################################################################################
# Created By:Pietryka
# Creation Date:  2016-08-22
# Purpose: Clean variables for CPS Turnout analysis
# Questions: mpietryka@fsu.edu
# Notes: Before running this code, download the following datasets:
#        1. Current Population Survey, November 2010: Voting and Registration 
#           Supplement (ICPSR 31082). https://doi.org/10.3886/ICPSR31082.v1
#        2. Current Population Survey, November 2014: Voting and Registration 
#           Supplement (ICPSR 36386).  https://doi.org/10.3886/ICPSR36386.v1
# 
################################################################################


# PREAMBLE =============================================



# LOAD PACKAGES  -----------------------------

library(foreign)
library(dplyr)
library(readr)

# Function to recode negative values as missing ---------

to_missing <- function(x){
  ifelse(x < 0, NA, x)
}

# LOAD DATA -----------------------------

# 2010 DATA
# (ICPSR 31082)
# https://doi.org/10.3886/ICPSR31082.v1


cps_10 <- read_tsv("Data/31082-0001-Data.tsv")

# 2014 DATA
# (ICPSR 36386). https://doi.org/10.3886/ICPSR36386.v1
cps_14 <- read_tsv("Data/36386-0001-Data.tsv")




#  Clean variables  -----------------------------


cps_clean_10 <- cps_10  %>%
  tibble() %>%
  mutate(turnout = case_when(
    PES1 == 1 ~ 1L,
    PES1 == 2 ~ 0L,
    TRUE ~ NA_integer_
  )) %>%
  mutate(
    age = PEAGE %>% na_if(-1),
    educ = PEEDUCA %>% na_if(-1),
    famincome = if_else(
      HEFAMINC <= 0,
      NA_integer_,
      HEFAMINC - 1L,
      missing = NA_integer_
    ),
    female = case_when(
      PESEX == 2 ~ 1L,
      PESEX == 1 ~ 0L,
      TRUE ~ NA_integer_
    ),
    student = case_when(
      PENLFACT == 3 ~ 1L,
      PEMLR == -1 ~ NA_integer_,
      TRUE ~ 0L
    ),
    prekkids = case_when(
      PRCHLD %in% c(1, 2, 5) ~ 1L,
      PRCHLD == 0 ~ 0L,
      TRUE ~ NA_integer_
    ),
    numberkids0_17 = PRNMCHLD %>% na_if(-1)
  )  %>%
  mutate_each(funs(to_missing),
              PEMARITL,
              PTDTRACE,
              PEMLR,
              PES8)  %>%
  mutate(PTDTRACE = ifelse(PTDTRACE > 5, 6, PTDTRACE))


cps_clean_14 <- cps_14  %>%
  tibble() %>%
  mutate(turnout = case_when(
    PES1 == 1 ~ 1L,
    PES1 == 2 ~ 0L,
    TRUE ~ NA_integer_
  )) %>%
  mutate(
    age = PRTAGE %>% na_if(-1),
    educ = PEEDUCA %>% na_if(-1),
    famincome = if_else(
      HEFAMINC <= 0,
      NA_integer_,
      HEFAMINC - 1L,
      missing = NA_integer_
    ),
    female = case_when(
      PESEX == 2 ~ 1L,
      PESEX == 1 ~ 0L,
      TRUE ~ NA_integer_
    ),
    student = case_when(
      PENLFACT == 3 ~ 1L,
      PEMLR == -1 ~ NA_integer_,
      TRUE ~ 0L
    ),
    prekkids = case_when(
      PRCHLD %in% c(1, 2, 5) ~ 1L,
      PRCHLD == 0 ~ 0L,
      TRUE ~ NA_integer_
    ),
    numberkids0_17 = PRNMCHLD %>% na_if(-1)
  ) |>
  rename(PES8 = PRS8)  %>%
  mutate_each(funs(to_missing),
              PEMARITL,
              PTDTRACE,
              PEMLR,
              PES8)  %>%
  mutate(PTDTRACE = ifelse(PTDTRACE > 5, 6, PTDTRACE))


#  SUBSET DATA =====================================


# outcome and treatment vars
main_vars <- c(
  "turnout",
  "prekkids")


# COVARIATES FOR MATCHING
covariates <- c(
  "educ",
  "famincome",
  "age",
  "female",
  "student",
  "PEMARITL", 
  "PTDTRACE",  
  "PEMLR",     
  "PES8"       
)


cps_sub_10 <- cps_clean_10  %>%
  filter(!is.na(turnout) ,
         PES1 != -1,
         numberkids0_17 < 2)  %>%
  select(one_of(main_vars), one_of(covariates))

cps_sub_14 <- cps_clean_14  %>%
  filter(!is.na(turnout) ,
         PES1 != -1,
         numberkids0_17 < 2)  %>%
  select(one_of(main_vars), one_of(covariates))


#  Save =====================================


save(
  cps_sub_10,
  cps_sub_14,
  main_vars,
  covariates,
  file = "Data/CPS-1A-Clean-Turnout.Rdata"
  )
